Building a Map of Australia: Choropleth and Contiguous Cartogram

Author

C21380786

Published

April 17, 2025

# Function to quietly install and load packages
quiet_library <- function(pkg) {
  suppressMessages(suppressWarnings({
    if (!requireNamespace(pkg, quietly = TRUE)) {
      install.packages(pkg, quiet = TRUE)
    }
    library(pkg, character.only = TRUE)
  }))
}
options(repos = c(CRAN = "https://cloud.r-project.org"))

# Core libraries
quiet_library("ggplot2")
quiet_library("sf")
quiet_library("ggspatial")
quiet_library("rnaturalearth")
quiet_library("rnaturalearthdata")
quiet_library("geojsonio")
quiet_library("leaflet")
quiet_library("magick")
quiet_library("tidyverse")
quiet_library("viridisLite")
quiet_library("geosphere")
quiet_library("ggrepel")
quiet_library("cartogram")
quiet_library("rmapshaper")
quiet_library("tmap")


quiet_library("wordcloud")
quiet_library("RColorBrewer")
quiet_library("stopwords")
quiet_library("textdata")
quiet_library("tidytext")
quiet_library("stringr")
quiet_library("tidyr")
quiet_library("collapsibleTree")
quiet_library("text2vec")
quiet_library("dplyr")
quiet_library("widyr")
quiet_library("igraph")
quiet_library("ggraph")

Base Map of Australia

australia <- ne_countries(scale = "medium", country = "Australia", returnclass = "sf")

ggplot(data = australia) +
  geom_sf(fill = "antiquewhite") +
  theme_minimal() +
  ggtitle("Base Map of Australia")

Overlay Major Cities

cities <- data.frame(
  name = c("Sydney", "Melbourne", "Brisbane", "Perth", "Adelaide", "Darwin", "Canberra", "Hobart"),
  lon  = c(151.2093, 144.9631, 153.0251, 115.8575, 138.6007, 130.8456, 149.1300, 147.3272),
  lat  = c(-33.8688, -37.8136, -27.4698, -31.9505, -34.9285, -12.4634, -35.2809, -42.8821)
)

cities_sf <- st_as_sf(cities, coords = c("lon", "lat"), crs = 4326)

base_plot <- ggplot() +
  geom_sf(data = australia, fill = "antiquewhite") +
  geom_sf(data = cities_sf, color = "red", size = 3) +
  geom_text_repel(data = cities, aes(x = lon, y = lat, label = name), size = 3,
                  nudge_y = 1, nudge_x = 1) +
  theme_minimal() +
  ggtitle("Australia Map with Major Cities")
base_plot

Overlay State Boundaries

states <- st_read("gadm41_AUS_1.json", quiet = TRUE)

map_with_states <- base_plot +
  geom_sf(data = states, fill = NA, color = "blue", size = 0.7) +
  ggtitle("Australia Map with Cities and State Boundaries")
map_with_states

Overlay Route

# Coordinates for Sydney and Perth
sydney <- cities %>% filter(name == "Sydney")
perth  <- cities %>% filter(name == "Perth")

# Plot with route line
map_with_route <- ggplot() +
  geom_sf(data = states, fill = NA, color = "blue", size = 0.5) +
  geom_sf(data = australia, fill = "antiquewhite", color = "gray70") +

  # Cities
  geom_point(data = cities, aes(x = lon, y = lat), color = "red", size = 3) +
  geom_text_repel(data = cities, aes(x = lon, y = lat, label = name), vjust = -1) +

  # Curved route from Sydney to Perth
  geom_curve(aes(x = sydney$lon, y = sydney$lat,
                 xend = perth$lon, yend = perth$lat),
             curvature = 0.2, color = "purple", linewidth = 1,
             arrow = arrow(length = unit(0.15, "inches"))) +

  # Map scale and north arrow
  annotation_scale(location = "bl", width_hint = 0.5) +
  annotation_north_arrow(location = "tl", which_north = "true", 
                         style = north_arrow_fancy_orienteering) +

  # Theme and title
  theme_minimal() +
  ggtitle("Australia with Major Cities and Route: Sydney to Perth")

map_with_route
Scale on map varies by more than 10%, scale bar may be inaccurate

Add Median Income by State

# Read the CSV
income <- read.csv("medianincomeaustralia.csv")

# Join directly using matching NAME_1 and State columns
states_income <- left_join(states, income, by = c("NAME_1" = "State"))

# Plot the choropleth map
choropleth_map <- ggplot() +
  geom_sf(data = states_income, aes(fill = mdn_income), color = "black") +
  scale_fill_viridis_c(option = "magma", name = "Median Income") +
  geom_sf(data = australia, fill = NA, color = "gray50") +
  geom_sf(data = cities_sf, color = "red", size = 3) +
  geom_text_repel(data = cities, aes(x = lon, y = lat, label = name), size = 3) +
  theme_minimal() +
  ggtitle("Choropleth Map of Australia: Median Income (Aug 2024)")
choropleth_map

Annotate Map with Scale and Compass

choropleth_map_annotated <- choropleth_map +
  annotation_scale(location = "bl", width_hint = 0.5) +
  annotation_north_arrow(location = "bl", which_north = "true",
                         pad_x = unit(0.1, "in"), pad_y = unit(0.5, "in"),
                         style = north_arrow_fancy_orienteering)
choropleth_map_annotated
Scale on map varies by more than 10%, scale bar may be inaccurate

Reproject to EPSG:28355 (Australia Transverse Mercator)

australia_28355 <- st_transform(australia, crs = 28355)
states_income_28355 <- st_transform(states_income, crs = 28355)
cities_sf_28355 <- st_transform(cities_sf, crs = 28355)

reprojected_map <- ggplot() +
  geom_sf(data = australia_28355, fill = "antiquewhite") +
  geom_sf(data = states_income_28355, aes(fill = mdn_income), color = "black") +
  scale_fill_viridis_c(option = "magma") +
  geom_sf(data = cities_sf_28355, color = "red", size = 3) +
  geom_text_repel(data = as.data.frame(st_coordinates(cities_sf_28355)) %>% bind_cols(cities),
                  aes(x = X, y = Y, label = name), size = 3) +
  annotation_scale(location = "bl", width_hint = 0.5) +
  annotation_north_arrow(location = "bl", which_north = "true",
                         pad_x = unit(0.1, "in"), pad_y = unit(0.5, "in"),
                         style = north_arrow_fancy_orienteering) +
  theme_minimal() +
  ggtitle("Reprojected Choropleth Map of Australia (EPSG:28355)")
reprojected_map

Create Simplified State Shapes

states_simplified <- ms_simplify(states_income_28355, keep = 0.05, keep_shapes = TRUE)

Generate Cartogram

states_cartogram <- cartogram_cont(states_simplified, "mdn_income", itermax = 5)
Warning in cartogram_cont.sf(states_simplified, "mdn_income", itermax = 5): NA
not allowed in weight vector. Features will be removed from Shape.

Plot Cartogram Using tmap

tmap_mode("plot")
ℹ tmap mode set to "plot".
cartogram_map <- tm_shape(states_cartogram) +
  tm_polygons("mdn_income", palette = "Blues", style = "jenks", title = "Median Income") +
  tm_scale_bar(position = c("left", "bottom")) +
  tm_compass(position = c("right", "top"), type = "arrow") +
  tm_layout(title = "Contiguous Cartogram of Australia (Median Income)",
            inner.margins = c(0.1, 0.1, 0.1, 0.1))

── tmap v3 code detected ───────────────────────────────────────────────────────
[v3->v4] `tm_polygons()`: instead of `style = "jenks"`, use fill.scale =
`tm_scale_intervals()`.
ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
  'tm_scale_intervals(<HERE>)'[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'! `tm_scale_bar()` is deprecated. Please use `tm_scalebar()` instead.[v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(title = )`
cartogram_map
[cols4all] color palettes: use palettes from the R package cols4all. Run
`cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
"brewer.blues"
Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.

[plot mode] fit legend/component: Some legend items or map compoments do not
fit well, and are therefore rescaled.
ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

# Prepare spatial layers in WGS84 (required for leaflet)
states_income_leaflet <- st_transform(states_income, crs = 4326)
cities_sf_leaflet <- st_transform(cities_sf, crs = 4326)

# Create base leaflet map
leaflet_map <- leaflet() %>%
  addTiles(group = "Base Map") %>%

  # Add state boundaries
  addPolygons(data = states_income_leaflet,
              color = "blue", weight = 1, fill = FALSE,
              label = ~NAME_1,
              group = "State Boundaries") %>%

  # Add income-based fill choropleth
  addPolygons(data = states_income_leaflet,
              fillColor = ~colorNumeric("YlOrRd", mdn_income)(mdn_income),
              fillOpacity = 0.7, weight = 1, color = "#444444",
              label = ~paste(NAME_1, "<br>Income:", round(mdn_income, 2)),
              highlightOptions = highlightOptions(weight = 2, color = "black", bringToFront = TRUE),
              group = "Income Choropleth") %>%

  # Add cities
  addCircleMarkers(data = cities_sf_leaflet,
                   radius = 5,
                   color = "red",
                   stroke = TRUE,
                   fillOpacity = 0.9,
                   label = ~name,
                   group = "Major Cities") %>%

  # Add layer controls
  addLayersControl(
    overlayGroups = c("State Boundaries", "Income Choropleth", "Major Cities"),
    options = layersControlOptions(collapsed = FALSE)
  )

leaflet_map
sample_proclamation <- c(
  "we declare the right of the people of ireland",
  "we place the cause of the irish republic under the protection of god",
  "we ask the blessing of the nation and of humanity",
  "we resolve to pursue the happiness and prosperity of the nation",
  "we strike in full confidence of victory",
  "we proclaim the irish republic as a sovereign independent state",
  "we pledge our lives to the cause of freedom",
  "we call upon our fellow countrymen to support us",
  "we stand ready to defend our nation and its rights",
  "we honour the sacrifice of those who came before us"
)

proclamation_df <- tibble(line = 1:10, text = sample_proclamation)

Step 1: N-Grams (Bigrams & Trigrams)

# Bigrams
bigrams <- proclamation_df %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2)

# Trigrams
trigrams <- proclamation_df %>%
  unnest_tokens(trigram, text, token = "ngrams", n = 3)

# Separate & remove stop words

data("stop_words")

bigrams_separated <- bigrams %>%
  separate(bigram, into = c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stop_words$word, !word2 %in% stop_words$word)

trigrams_separated <- trigrams %>%
  separate(trigram, into = c("word1", "word2", "word3"), sep = " ") %>%
  filter(!word1 %in% stop_words$word,
         !word2 %in% stop_words$word,
         !word3 %in% stop_words$word)

# View top bigrams
bigrams_separated %>%
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = TRUE)
# A tibble: 4 × 2
  bigram                    n
  <chr>                 <int>
1 irish republic            2
2 fellow countrymen         1
3 sovereign independent     1
4 stand ready               1

Step 2: Bag of Words

words <- proclamation_df %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words, by = "word") %>%
  count(word, sort = TRUE)

words
# A tibble: 33 × 2
   word           n
   <chr>      <int>
 1 nation         3
 2 irish          2
 3 republic       2
 4 blessing       1
 5 call           1
 6 confidence     1
 7 countrymen     1
 8 declare        1
 9 defend         1
10 fellow         1
# ℹ 23 more rows

Step 3: Word Cloud

set.seed(123)
wordcloud(words = words$word,
          freq = words$n,
          min.freq = 1,
          max.words = 100,
          colors = brewer.pal(8, "Dark2"))

Step 4: Semantic Network (Co-occurrence)

co_occurrence <- proclamation_df %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words) %>%
  pairwise_count(word, line, sort = TRUE, upper = FALSE)
Joining with `by = join_by(word)`
# Plot top co-occurring pairs
co_occurrence %>%
  filter(n >= 2) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = n), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 5) +
  geom_node_text(aes(label = name), repel = TRUE) +
  theme_void() +
  ggtitle("Semantic Network: Co-occurring Words")

Step 5: D3 Collapsible Word Tree

# Tokenize proclamation sentences
tokenized <- word_tokenizer(tolower(sample_proclamation))

# Find maximum sentence length
max_len <- max(lengths(tokenized))

# Pad and build data frame
word_tree_data <- tokenized %>%
  lapply(function(x) c(x, rep(NA, max_len - length(x)))) %>%
  do.call(rbind, .) %>%
  as.data.frame(stringsAsFactors = FALSE)

# Set column names
colnames(word_tree_data) <- paste0("word", 1:ncol(word_tree_data))

# Generate collapsible word tree from root "we"
collapsibleTree(word_tree_data, hierarchy = colnames(word_tree_data), root = "we")